

<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0" />

  <title>Datasets &mdash; Openspeech v0.3.0 documentation</title>



  <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
  <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />










  <!--[if lt IE 9]>
    <script src="../_static/js/html5shiv.min.js"></script>
  <![endif]-->


      <script type="text/javascript" id="documentation_options" data-url_root="../" src="../_static/documentation_options.js"></script>
        <script src="../_static/jquery.js"></script>
        <script src="../_static/underscore.js"></script>
        <script src="../_static/doctools.js"></script>
        <script src="../_static/language_data.js"></script>

    <script type="text/javascript" src="../_static/js/theme.js"></script>


    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="Data Loaders" href="Data Loaders.html" />
    <link rel="prev" title="Feature Transform" href="Feature Transform.html" />
</head>

<body class="wy-body-for-nav">


  <div class="wy-grid-for-nav">

    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >



            <a href="../index.html" class="icon icon-home"> Openspeech



          </a>







<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>


        </div>


        <div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="main navigation">






              <p class="caption"><span class="caption-text">GETTING STARTED</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../notes/intro.html">Introduction</a></li>
<li class="toctree-l1"><a class="reference internal" href="../notes/hydra_configs.html">Openspeech’s Hydra configuration</a></li>
<li class="toctree-l1"><a class="reference internal" href="../notes/configs.html">Openspeech’s configurations</a></li>
</ul>
<p class="caption"><span class="caption-text">OPENSPEECH MODELS</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech Model.html">Openspeech Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech CTC Model.html">Openspeech CTC Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech Encoder Decoder Model.html">Openspeech Encoder Decoder Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech Transducer Model.html">Openspeech Transducer Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../models/Openspeech Language Model.html">Openspeech Language Model</a></li>
</ul>
<p class="caption"><span class="caption-text">MODEL ARCHITECTURES</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Conformer.html">Conformer</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/ContextNet.html">ContextNet</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/DeepSpeech2.html">DeepSpeech2</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Jasper.html">Jasper</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Listen Attend Spell.html">Listen Attend Spell Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/LSTM LM.html">LSTM Language Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/QuartzNet.html">QuartzNet Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/RNN Transducer.html">RNN Transducer Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Transformer.html">Transformer Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Transformer LM.html">Transformer Language Model</a></li>
<li class="toctree-l1"><a class="reference internal" href="../architectures/Transformer Transducer.html">Transformer Transducer Model</a></li>
</ul>
<p class="caption"><span class="caption-text">CORPUS</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../corpus/AISHELL-1.html">AISHELL</a></li>
<li class="toctree-l1"><a class="reference internal" href="../corpus/KsponSpeech.html">KsponSpeech</a></li>
<li class="toctree-l1"><a class="reference internal" href="../corpus/LibriSpeech.html">LibriSpeech</a></li>
</ul>
<p class="caption"><span class="caption-text">LIBRARY REFERENCE</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="Callback.html">Callback</a></li>
<li class="toctree-l1"><a class="reference internal" href="Criterion.html">Criterion</a></li>
<li class="toctree-l1"><a class="reference internal" href="Data Augment.html">Data Augment</a></li>
<li class="toctree-l1"><a class="reference internal" href="Feature Transform.html">Feature Transform</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">Datasets</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#module-openspeech.data.audio.dataset">Speech To Text Dataset</a></li>
<li class="toctree-l2"><a class="reference internal" href="#module-openspeech.data.text.dataset">Text Dataset</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="Data Loaders.html">Data Loaders</a></li>
<li class="toctree-l1"><a class="reference internal" href="Decoders.html">Decoders</a></li>
<li class="toctree-l1"><a class="reference internal" href="Encoders.html">Encoders</a></li>
<li class="toctree-l1"><a class="reference internal" href="Modules.html">Modules</a></li>
<li class="toctree-l1"><a class="reference internal" href="Optim.html">Optim</a></li>
<li class="toctree-l1"><a class="reference internal" href="Search.html">Search</a></li>
<li class="toctree-l1"><a class="reference internal" href="Tokenizers.html">Tokenizers</a></li>
<li class="toctree-l1"><a class="reference internal" href="Metric.html">Metric</a></li>
</ul>



        </div>

      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap">


      <nav class="wy-nav-top" aria-label="top navigation">

          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../index.html">Openspeech</a>

      </nav>


      <div class="wy-nav-content">

        <div class="rst-content">



















<div role="navigation" aria-label="breadcrumbs navigation">

  <ul class="wy-breadcrumbs">

      <li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>

      <li>Datasets</li>


      <li class="wy-breadcrumbs-aside">


            <a href="../_sources/modules/Datasets.rst.txt" rel="nofollow"> View page source</a>


      </li>

  </ul>


  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">

  <div class="section" id="datasets">
<h1>Datasets<a class="headerlink" href="#datasets" title="Permalink to this headline">¶</a></h1>
<div class="section" id="module-openspeech.data.audio.dataset">
<span id="speech-to-text-dataset"></span><h2>Speech To Text Dataset<a class="headerlink" href="#module-openspeech.data.audio.dataset" title="Permalink to this headline">¶</a></h2>
<dl class="py class">
<dt id="openspeech.data.audio.dataset.SpeechToTextDataset">
<em class="property">class </em><code class="sig-prename descclassname">openspeech.data.audio.dataset.</code><code class="sig-name descname">SpeechToTextDataset</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">configs</span><span class="p">:</span> <span class="n">omegaconf.dictconfig.DictConfig</span></em>, <em class="sig-param"><span class="n">dataset_path</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.9)">str</a></span></em>, <em class="sig-param"><span class="n">audio_paths</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.9)">list</a></span></em>, <em class="sig-param"><span class="n">transcripts</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.9)">list</a></span></em>, <em class="sig-param"><span class="n">sos_id</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)">int</a></span> <span class="o">=</span> <span class="default_value">1</span></em>, <em class="sig-param"><span class="n">eos_id</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)">int</a></span> <span class="o">=</span> <span class="default_value">2</span></em>, <em class="sig-param"><span class="n">del_silence</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)">bool</a></span> <span class="o">=</span> <span class="default_value">False</span></em>, <em class="sig-param"><span class="n">apply_spec_augment</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)">bool</a></span> <span class="o">=</span> <span class="default_value">False</span></em>, <em class="sig-param"><span class="n">apply_noise_augment</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)">bool</a></span> <span class="o">=</span> <span class="default_value">False</span></em>, <em class="sig-param"><span class="n">apply_time_stretch_augment</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)">bool</a></span> <span class="o">=</span> <span class="default_value">False</span></em>, <em class="sig-param"><span class="n">apply_joining_augment</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)">bool</a></span> <span class="o">=</span> <span class="default_value">False</span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/openspeech/data/audio/dataset.html#SpeechToTextDataset"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#openspeech.data.audio.dataset.SpeechToTextDataset" title="Permalink to this definition">¶</a></dt>
<dd><p>Dataset for audio &amp; transcript matching</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p>Do not use this class directly, use one of the sub classes.</p>
</div>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>dataset_path</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#str" title="(in Python v3.9)"><em>str</em></a>) – path of librispeech dataset</p></li>
<li><p><strong>audio_paths</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.9)"><em>list</em></a>) – list of audio path</p></li>
<li><p><strong>transcripts</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.9)"><em>list</em></a>) – list of transript</p></li>
<li><p><strong>sos_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)"><em>int</em></a>) – identification of &lt;startofsentence&gt;</p></li>
<li><p><strong>eos_id</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#int" title="(in Python v3.9)"><em>int</em></a>) – identification of &lt;endofsentence&gt;</p></li>
<li><p><strong>del_silence</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)"><em>bool</em></a>) – flag indication whether to apply delete silence or not</p></li>
<li><p><strong>apply_spec_augment</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)"><em>bool</em></a>) – flag indication whether to apply spec augment or not</p></li>
<li><p><strong>apply_noise_augment</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)"><em>bool</em></a>) – flag indication whether to apply noise augment or not</p></li>
<li><p><strong>apply_time_stretch_augment</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)"><em>bool</em></a>) – flag indication whether to apply time stretch augment or not</p></li>
<li><p><strong>apply_joining_augment</strong> (<a class="reference external" href="https://docs.python.org/3/library/functions.html#bool" title="(in Python v3.9)"><em>bool</em></a>) – flag indication whether to apply audio joining augment or not</p></li>
</ul>
</dd>
</dl>
</dd></dl>

</div>
<div class="section" id="module-openspeech.data.text.dataset">
<span id="text-dataset"></span><h2>Text Dataset<a class="headerlink" href="#module-openspeech.data.text.dataset" title="Permalink to this headline">¶</a></h2>
<dl class="py class">
<dt id="openspeech.data.text.dataset.TextDataset">
<em class="property">class </em><code class="sig-prename descclassname">openspeech.data.text.dataset.</code><code class="sig-name descname">TextDataset</code><span class="sig-paren">(</span><em class="sig-param"><span class="n">transcripts</span><span class="p">:</span> <span class="n"><a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.9)">list</a></span></em>, <em class="sig-param"><span class="n">tokenizer</span></em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/openspeech/data/text/dataset.html#TextDataset"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#openspeech.data.text.dataset.TextDataset" title="Permalink to this definition">¶</a></dt>
<dd><p>Dataset for language modeling.</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
<dd class="field-odd"><ul class="simple">
<li><p><strong>transcripts</strong> (<a class="reference external" href="https://docs.python.org/3/library/stdtypes.html#list" title="(in Python v3.9)"><em>list</em></a>) – list of transcript</p></li>
<li><p><strong>tokenizer</strong> (<a class="reference internal" href="Tokenizers.html#openspeech.tokenizers.tokenizer.Tokenizer" title="openspeech.tokenizers.tokenizer.Tokenizer"><em>Tokenizer</em></a>) – tokenizer is in charge of preparing the inputs for a model.</p></li>
</ul>
</dd>
</dl>
</dd></dl>

</div>
</div>


           </div>

          </div>
          <footer>
    <div class="rst-footer-buttons" role="navigation" aria-label="footer navigation">
        <a href="Data Loaders.html" class="btn btn-neutral float-right" title="Data Loaders" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
        <a href="Feature Transform.html" class="btn btn-neutral float-left" title="Feature Transform" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
    </div>

  <hr/>

  <div role="contentinfo">
    <p>
        &#169; Copyright 2021, Kim, Soohwan and Ha, Sangchun and Cho, Soyoung.

    </p>
  </div>



    Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a

    <a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>

    provided by <a href="https://readthedocs.org">Read the Docs</a>.

</footer>
        </div>
      </div>

    </section>

  </div>


  <script type="text/javascript">
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>






</body>
</html>